Inspect missings in markers and individuals
#missing data per marker
mrk <- read.csv2("LM3_F7_K.MARKER.genoSUMMARY.csv", header = T, sep = ",")
head(mrk)
## marker AX EX NA. tot
## 1 Peex113Ctg00004_119009 107 54 34 195
## 2 Peex113Ctg00043_151704 105 58 32 195
## 3 Peex113Ctg00050_84548 101 51 43 195
## 4 Peex113Ctg00050_84557 103 52 40 195
## 5 Peex113Ctg00050_84666 106 72 17 195
## 6 Peex113Ctg00050_485230 106 72 17 195
mrks <- melt(mrk)
mrks <- mrks %>% filter(variable != "tot")
#check for exessive missings
mrk[which(mrk$NA. > 100),]
## [1] marker AX EX NA. tot
## <0 rows> (or 0-length row.names)
ggplot(data = mrks, aes(value, fill = variable)) +
geom_density(alpha = 0.2)
#missing data per individual
indv <- read.csv2("LM3_F7_K.INDIVIDUAL.genoSUMMARY.csv", header = T, sep = ",")
indvs <- melt(indv)
head(indv)
## individual AX EX NA. tot
## 1 RIL_196 604 1226 22 1852
## 2 RIL_197 1103 537 212 1852
## 3 RIL_198 456 994 402 1852
## 4 RIL_199 1343 475 34 1852
## 5 RIL_200 934 557 361 1852
## 6 RIL_201 690 730 432 1852
head(indvs)
## individual variable value
## 1 RIL_196 AX 604
## 2 RIL_197 AX 1103
## 3 RIL_198 AX 456
## 4 RIL_199 AX 1343
## 5 RIL_200 AX 934
## 6 RIL_201 AX 690
indvs <- indvs %>% filter(variable != "tot")
#distribution of missings and ax and ex genotypes
ggplot(data = indvs, aes(value, variable, color = variable)) +
geom_point(alpha = 0.2)
#ax vs ex
ggplot(data = indv, aes(AX, EX)) +
geom_point(alpha = 0.2)
#missings
ggplot(data = indv, aes(NA.)) +
geom_histogram(alpha = 0.2)
#remove those 4 individuals from the genetic map building
indv[which(indv$NA. > 1000),]
## individual AX EX NA. tot
## 29 RIL_35 12 22 1818 1852
## 38 RIL_44 7 15 1830 1852
## 63 RIL_67 10 19 1823 1852
## 88 RIL_88 200 553 1099 1852
# 4 individuals removed and altered as LM3_F7_K.markers.clean.csv
f7_K <- read.cross(format = "csv", file = "LM3_F7_K.markers.clean.csv", F.gen = 7, genotypes = c("AX", "HET", "EX"))
## --Read the following data:
## 191 individuals
## 1852 markers
## 1 phenotypes
## --Cross type: bcsft
f7_K <- convert2riself(f7_K)
#save duplicates and corresponding markers in bins to reconstruct them later
duplicatesf7_K <- findDupMarkers(f7_K)
f7_dups <- do.call(rbind, lapply(seq_along(duplicatesf7_K), function(i){
data.frame(bin_name=names(duplicatesf7_K)[i], duplicatesf7_K[[i]])}))
head(f7_dups)
## bin_name duplicatesf7_K..i..
## 1 Peex113Ctg00050_84666 Peex113Ctg00050_485230
## 2 Peex113Ctg00050_485239 Peex113Ctg00050_485279
## 3 Peex113Ctg05141_86416 Peex113Ctg05141_86419
## 4 Peex113Ctg18048_296982 Peex113Ctg05145_130001
## 5 Peex113Ctg18048_296982 Peex113Ctg00860_156636
## 6 Peex113Ctg18048_296982 Peex113Ctg09060_154418
write.table(f7_dups,file = "LM3_F7_K.marker.dupbins1.txt", quote = FALSE, row.names = FALSE)
#remove duplicate markers
f7_K <- pullCross(f7_K, type = "co.located")
#set bychr = FALSE to allow complete reconstruction of map
map1 <- mstmap(f7_K, pop.tpye = "ARIL", bychr = F, dist.fun = "kosambi", trace = TRUE, detectBadData = T, p.value = 1e-09, , mvest.bc = TRUE, return.imputed = T)
#sort markers in each LG
map1 <- mstmap(map1, pop.tpye = "ARIL", bychr = T, dist.fun = "kosambi", trace = TRUE, detectBadData = T, p.value = 1e-09, , mvest.bc = TRUE, return.imputed = T)
#inspect results
summary(map1)
## RI strains via selfing
##
## No. individuals: 191
##
## No. phenotypes: 1
## Percent phenotyped: 100
##
## No. chromosomes: 12
## Autosomes: L.1 L.10 L.11 L.12 L.2 L.3 L.4 L.5 L.6 L.7 L.8 L.9
##
## Total markers: 744
## No. markers: 182 3 1 1 82 93 79 144 75 70 4 10
## Percent genotyped: 89.8
## Genotypes (%): AA:46.4 BB:53.6
geno.image(map1)
head(geno.table(map1))
## chr missing AA BB P.value
## Peex113Ctg17884_167193 L.1 45 90 56 0.004895054
## Peex113Ctg18307_162364 L.1 30 75 86 0.385985052
## Peex113Ctg18308_61078 L.1 23 82 86 0.757620724
## Peex113Ctg18308_60876 L.1 24 81 86 0.698821641
## Peex113Ctg18307_162308 L.1 50 55 86 0.009036479
## Peex113Ctg17849_345854 L.1 29 77 85 0.529650670
#geno.image(map1, col = c("white", "darkorange", "lightblue"))
heatMap(map1, lmax = 25)
nmar(map1)
## L.1 L.10 L.11 L.12 L.2 L.3 L.4 L.5 L.6 L.7 L.8 L.9
## 182 3 1 1 82 93 79 144 75 70 4 10
summary(map1)
## RI strains via selfing
##
## No. individuals: 191
##
## No. phenotypes: 1
## Percent phenotyped: 100
##
## No. chromosomes: 12
## Autosomes: L.1 L.10 L.11 L.12 L.2 L.3 L.4 L.5 L.6 L.7 L.8 L.9
##
## Total markers: 744
## No. markers: 182 3 1 1 82 93 79 144 75 70 4 10
## Percent genotyped: 89.8
## Genotypes (%): AA:46.4 BB:53.6
Expected 7 LGs in Petunia, therefore merge small LGs into a single one and try to push markers onto the 7 large LGs
#put all small LGs in to one
nmar(map1)
## L.1 L.10 L.11 L.12 L.2 L.3 L.4 L.5 L.6 L.7 L.8 L.9
## 182 3 1 1 82 93 79 144 75 70 4 10
row.names(as.data.frame(unlist(nmar(map1)[nmar(map1) < 20])))
## [1] "L.10" "L.11" "L.12" "L.8" "L.9"
map2 <- mergeCross(map1, merge = list(
"UNKNOWN" =row.names(as.data.frame(unlist(nmar(map1)[nmar(map1) < 20])))
))
heatMap(map2)
## Warning in heatMap(map2): Running est.rf.
#try to assign them to chromosomes
#map2 <- pushCross(map2, type = "unlinked", unlinked.chr = "UNKNOWN" )
#map3 <- mstmap(map2, bychr = TRUE, trace = TRUE, anchor = TRUE, p.value= 2)
#summary(map3)
nmar(map1)
## L.1 L.10 L.11 L.12 L.2 L.3 L.4 L.5 L.6 L.7 L.8 L.9
## 182 3 1 1 82 93 79 144 75 70 4 10
# summary(map2)
#no improvement
names(map2$geno)[8] <- "L.8"
profileGen(map2, bychr = FALSE, stat.type = c("xo", "dxo", "miss"), id = "Genotype", xo.lambda = 25, layout = c(1, 3), lty = 2)
write.cross(format = "csv", map2, "F7_Kmap2.asmap")
?profileGen
python ~/Tools/repositories/GBS2map/Rqtl_2_abh.py -i F7_Kmap2.asmap.csv -o F7_Kmap2.abhIN.csv -q2a
## Extracting 744 markers from 191 individuals
## Finished.
## QTL to ABHGENOTYPER
#test
library(ABHgenotypeR)
abh <- readABHgenotypes("F7_Kmap2.abhIN.csv", nameA = "A", nameB = "B")
plotGenos(abh)
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
# NA's are mistakenly labelled as heterozygotes when heterozygotes are absent. Bug in abhgenotyper code
pes <- imputeByFlanks(abh)
## The absolute number of genotypes in inputGenos is:
## A B N
## 59261 68404 14439
##
## or in percentage
##
## A B H N
## A 41.703 48.137 10.161 NA
##
## The absolute number of genotypes in outputGenos is:
## A B N
## 64923 74568 2613
##
## or in percentage
##
## A B H N
## A 45.687 52.474 1.839 NA
pes <- correctStretches(pes)
## The absolute number of genotypes in inputGenos is:
## A B N
## 64923 74568 2613
##
## or in percentage
##
## A B H N
## A 45.687 52.474 1.839 NA
##
## The absolute number of genotypes in outputGenos is:
## A B N
## 64995 74496 2613
##
## or in percentage
##
## A B H N
## A 45.738 52.424 1.839 NA
pes <- imputeByFlanks(pes)
## The absolute number of genotypes in inputGenos is:
## A B N
## 64995 74496 2613
##
## or in percentage
##
## A B H N
## A 45.738 52.424 1.839 NA
##
## The absolute number of genotypes in outputGenos is:
## A B N
## 64995 74496 2613
##
## or in percentage
##
## A B H N
## A 45.738 52.424 1.839 NA
reportGenos(pes)
## The absolute number of genotypes in pes is:
## A B N
## 64995 74496 2613
##
## or in percentage
##
## A B H N
## A 45.738 52.424 1.839 NA
plotGenos(pes, chromToPlot = 1:7)
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
# NOTE:
# NA's are mistakenly labelled as heterozygotes when heterozygotes are absent. Bug in abhgenotyper code
writeABHgenotypes(pes, "F7_Kmap3.abhOUT.csv")
python ~/Tools/repositories/GBS2map/Rqtl_2_abh.py -i F7_Kmap3.abhOUT.csv -o F7_Kmap3.asmapIN.csv -a2q
## Finished.
## ABHGENOTYPER to QTL
map3 <- read.cross(format = "csv", file ="F7_Kmap3.asmapIN.csv", F.gen = 7, genotypes = c("A", "HET", "B"))
## Warning in read.cross.csv(dir, file, na.strings, genotypes, estimate.map, : The following unexpected genotype codes were treated as missing.
## |N|
## --Read the following data:
## 191 individuals
## 744 markers
## 1 phenotypes
## Warning in summary.cross(cross): Strange genotype pattern.
## --Cross type: bcsft
map3 <- convert2riself(map3)
map3 <- pullCross(map3, type = "co.located")
#set bychr = FALSE to allow complete reconstruction of map
map4 <- mstmap(map3, pop.tpye = "ARIL", bychr = F, dist.fun = "kosambi", trace = TRUE, detectBadData = T, p.value = 1e-09, , mvest.bc = TRUE, return.imputed = T)
summary(map4)
## Warning in summary.cross(map4): Some markers at the same position on chr L.
## 1,L.11,L.2,L.3,L.4,L.5,L.6,L.7,L.8; use jittermap().
## RI strains via selfing
##
## No. individuals: 191
##
## No. phenotypes: 1
## Percent phenotyped: 100
##
## No. chromosomes: 11
## Autosomes: L.1 L.10 L.11 L.2 L.3 L.4 L.5 L.6 L.7 L.8 L.9
##
## Total markers: 417
## No. markers: 113 3 7 33 51 47 82 37 39 3 2
## Percent genotyped: 98.3
## Genotypes (%): AA:47.3 BB:52.7
heatMap(map4)
## Warning in heatMap(map4): Running est.rf.
plot.map(map4)
profileGen(map4, bychr = FALSE, stat.type = c("xo", "dxo", "miss"), id = "Genotype", xo.lambda = 25, layout = c(1, 3), lty = 2)
profileMark(map4, stat.type = c("seg.dist", "dxo", "erf", "lod"), id = "Genotype", layout = c(1, 4), type = "l")
## Warning in summary.cross(cross): Some markers at the same position on chr
## L.1,L.11,L.2,L.3,L.4,L.5,L.6,L.7,L.8; use jittermap().
#crossover events dropped from 11 to 9
mean(countxo(map2))
## [1] 13.03141
mean(countxo(map4))
## [1] 9.685864
summaryMap(map4)
## n.mar length ave.spacing max.spacing
## L.1 113 117.3 1.0 7.4
## L.10 3 0.3 0.1 0.1
## L.11 7 4.2 0.7 3.4
## L.2 33 28.1 0.9 6.6
## L.3 51 86.9 1.7 16.6
## L.4 47 58.5 1.3 11.8
## L.5 82 95.3 1.2 13.9
## L.6 37 53.4 1.5 10.7
## L.7 39 64.5 1.7 20.9
## L.8 3 0.0 0.0 0.0
## L.9 2 5.1 5.1 5.1
## overall 417 513.5 1.3 20.9